# coding=utf-8
# Copyright 2024 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# pylint: disable=line-too-long
"""Simple simulated linear regression experiment.

Runs RecursiveOptimizer on a linear regression problem.
Usage:
From google-research/
python -m recursive_optimizer.synthetic_experiment --optimizer=recursive --steps=10000 --learning_rate=1.0 --conditioning=min --inner_optimizer=SCINOL --eta=0.0 --tau=0.00001 --betting_domain=0.5 --epsilon=1.0 --epsilon_v=1.0

(set ARGS to --help to see list of arguments with defaults)
"""
# pylint: enable=line-too-long

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from absl import app
from absl import flags

import numpy as np
import tensorflow.compat.v1 as tf
import time

from recursive_optimizer.recursive_optimizer import RecursiveOptimizer

FLAGS = flags.FLAGS

# Problem definition
flags.DEFINE_integer('dimension', 100, 'Dataset dimension.')
flags.DEFINE_integer('dataset_size', 1000, 'Dataset size.')
flags.DEFINE_float('noise_scale', 0.0, 'scale of noise in data.')
flags.DEFINE_float('skewness', 750.0, 'Amount of ill-conditioning in data.')
flags.DEFINE_float('distance', 10.0, 'Norm of optimal weights.')
flags.DEFINE_enum(
    'conditioning', 'min', ['min', 'max'],
    'Whether to set the target to be the minimum or maximum eigenvalue of the'
    'covariance matrix.')
flags.DEFINE_string('loss', 'abs', 'Loss function to use.')
# Optimization
flags.DEFINE_integer('steps', 200000, 'Number of train steps.')
flags.DEFINE_enum('optimizer', 'recursive', ['recursive', 'adagrad'],
                  'Which optimizer to test.')
flags.DEFINE_string(
    'data_output', '',
    'If not empty then save Step,Wall Time,Loss details into this CSV file.')

# Remaining flags configure the optimizer.
# See parameters of RecursiveOptimizer's constructor in recursive_optimizer.py
# for their description.

# Should be set to 0.01 for Adagrad.
flags.DEFINE_float('learning_rate', 1.0, 'learning rate')
flags.DEFINE_float('tau', 0.0, 'tau')
flags.DEFINE_float('g_max', 0.0, 'g_max')
flags.DEFINE_string('inner_optimizer', 'SCINOL', 'inner_optimizer')
# The only non-theoretically sound part of the code takes a minimum between eta
# and some other quantitiy. This is useful in practice in deep learning tasks,
# but for simulations to test the pure theory, we can turn it off
# by setting eta to be very large.
flags.DEFINE_float('eta', 10000000.0, 'eta')
flags.DEFINE_float('epsilon', 1.0, 'epsilon')
flags.DEFINE_float('epsilon_v', 1.0, 'epsilon_v')
flags.DEFINE_bool('add_average', False, 'add_average')
flags.DEFINE_float('betting_domain', 0.5, 'betting_domain')


def make_features(dataset_size, dimension, optimal_weights, root_covariance,
                  noise_scale):
  """Creates synthetic linear regression dataset.

  Args:
    dataset_size: number of rows in the dataset
    dimension: number of columns
    optimal_weights: target weight vector
    root_covariance: scaling matrix for dataset.  The features in the dataset
      are drawn from a normal distribution with covariance root_covariance**2
      The labels are generated by multiplying the features by the optimal weight
      vector.

  Returns:
    features, labels
  """

  un_skewed = np.random.normal(0.0, 1.0,
                               [dataset_size, dimension]) / np.sqrt(dimension)
  skewed = np.dot(un_skewed, root_covariance.T)
  evals, _ = np.linalg.eig(root_covariance.dot(root_covariance.T))
  # print('eigenvalues: ', evals)

  products = skewed.dot(optimal_weights)
  product_sum = np.sum(products**2)

  average_product_sum = product_sum / dataset_size
  # skewed = skewed / np.sqrt(average_product_sum)

  products = skewed.dot(optimal_weights)
  product_sum = np.sum(products**2)

  coord_sum = np.sum(np.sqrt(np.sum(skewed * skewed, axis=0)))

  labels = np.dot(skewed, optimal_weights)

  # Uncomment next two lines to normalize so that loss at origin is 1.
  # Note that by default we are normalized so that target weights have norm
  # FLAGS.distance, which may be more appropriate since this is the parameter
  # that shows up in regret bounds.

  skewed = skewed/np.linalg.norm(labels)*np.sqrt(dataset_size)
  labels = np.dot(skewed, optimal_weights)

  print('label norm: ', np.sum(labels * labels) / dataset_size)
  skewed = tf.constant(skewed, dtype=tf.float32)
  labels = tf.constant(labels, dtype=tf.float32)

  # Uncomment to add noise to the labels.

  # noise = tf.constant(np.random.normal(0,
  #                                      noise_scale,
  #                                      [dataset_size, 1]),
  #                     dtype=tf.float32)
  # labels = labels + noise

  print('product sum: ', product_sum)
  print('coord sum: ', coord_sum)
  return skewed, labels


def generate_random_root_covariance(skewness, dimension):
  """creates random root covariance matrix for use in make_features.

  Args:
    skewness: parameter indicating how much the eigenvalues of the matrix should
      decay.
    dimension: dimension of covariance matrix.

  Returns:
    covariance matrix, minimum eigenvector
  """
  print('skewness: ', skewness)
  print('dimension: ', dimension)
  D = np.diag(
      np.sqrt(np.exp(np.linspace(0.0, np.log(skewness), dimension)) / skewness))

  U, _ = np.linalg.qr(np.random.normal(0.0, 1.0, [dimension, dimension]))
  V, _ = np.linalg.qr(np.random.normal(0.0, 1.0, [dimension, dimension]))

  root_covariance = U.dot(D).dot(V.T)

  evals, evecs = np.linalg.eig(root_covariance.dot(root_covariance.T))

  if FLAGS.conditioning == 'min':
    target = (evecs[:, np.argmin(evals)]).flatten()
  else:
    target = (evecs[:, np.argmax(evals)]).flatten()
  return root_covariance, target


def generate_optimal_weights(dimension, distance):
  """Makes random optimal weights for use in make_features."""
  direction = np.random.normal(0.0, 1.0, [dimension])
  direction = direction / np.linalg.norm(direction)
  weights = distance * direction
  weights = np.reshape(weights, [dimension, 1])
  return tf.to_float(tf.constant(weights))


def get_train_step(weights, training_data, optimizer, wealth):
  features = training_data[0]
  label = training_data[1]
  reshaped_weights = tf.reshape(weights, [-1])
  if FLAGS.loss == 'abs':
    loss = tf.abs(tf.reduce_sum(features * reshaped_weights) - label)
  else:
    loss = tf.reduce_sum(features * reshaped_weights) #tf.log(1+tf.exp(-tf.reduce_sum(features * reshaped_weights) * label))
  wealth_update = tf.assign_add(wealth, loss)
  with tf.control_dependencies([wealth_update]):
    train_step = optimizer.minimize(loss, var_list=[weights])
  return tf.group(train_step, wealth_update)


def eval_model(weights, data, dataset_size):
  features = data[0]
  labels = data[1]
  if FLAGS.loss=='abs':
    loss = tf.reduce_sum(
        tf.abs(tf.reshape(tf.matmul(features, weights), [-1]) - labels) /
        dataset_size)
  else:
    loss = tf.reduce_sum(tf.matmul(features, weights)/dataset_size)# tf.reduce_sum(tf.log(1+tf.exp(-tf.reshape(tf.matmul(features, weights), [-1]) * labels)) / dataset_size)
  return loss


def train_model(optimizer, dimension, dataset_size, root_covariance, scale,
                optimal_weights, steps):
  """Generates synthetic data and trains the model.

  Args:
    optimizer: optimizer for use in training.
    dimension: dimension of features.
    dataset_size: size of synthetic dataset.
    root_covariance: root covariance matrix of data features.
    scale: Noise scale
    optimal_weights: target weight vector.
    steps: number of training steps.

  Returns:
    final_difference: scaled norm of difference between learned and optimal
      weights.
    final_loss: final training loss.
    final_weights: learned weights.
    optimal_weights_eval: optimal_weights evaluated (not as a Tensor).
  """
  if FLAGS.data_output != '':
    outputfile = open(FLAGS.data_output, 'w')
    outputfile.write('{}, {}, {}\n'.format('Step', 'Value', 'Wall time'))
  full_batch = make_features(dataset_size, dimension, optimal_weights,
                             root_covariance, scale)
  optimal_weights = tf.constant(optimal_weights, dtype=tf.float32)
  training_data = tf.data.Dataset.from_tensor_slices(full_batch)
  weights = tf.Variable(tf.zeros([dimension, 1]))
  wealth = tf.Variable(0.0)
  iterator = training_data.shuffle(
      buffer_size=1000).repeat().make_initializable_iterator()
  example = iterator.get_next()
  train_step = get_train_step(weights, example, optimizer, wealth)
  global_initializer = tf.global_variables_initializer()
  session = tf.Session()
  iter_initializer = iterator.initializer

  difference = tf.norm(
      tf.reshape(optimal_weights, [-1]) -
      tf.reshape(weights, [-1])) / tf.norm(optimal_weights)
  eval_loss = eval_model(weights, full_batch, dataset_size)
  eval_zero = eval_model(0 * weights, full_batch, dataset_size)
  session.run(global_initializer)
  session.run(iter_initializer)

  for stepnum in range(steps):
    if stepnum % 50000 == 0:
      print('iteration: ', stepnum)
    session.run(train_step)
    if stepnum % 1000 == 0 and FLAGS.data_output != '':
      with tf.control_dependencies([eval_zero]):
        current_eval = session.run(eval_loss)
      outputfile.write('{}, {}, {}\n'.format(stepnum, current_eval, time.time()))
  final_wealth = session.run(wealth)
  print('final wealth: ', final_wealth)
  final_difference = session.run(difference)
  final_loss = session.run(eval_loss)
  initial_loss = session.run(eval_zero)
  final_weights = session.run(tf.reshape(weights, [-1]))
  optimal_weights_eval = session.run(tf.reshape(optimal_weights, [-1]))
  session.close()
  print('initial loss: ', initial_loss)
  return final_difference, final_loss, final_weights, optimal_weights_eval


def train_and_report(dimension, dataset_size, skewness, scale, distance, steps,
                     optimizer):
  """Train on synthetic data.

  Args:
    dimension: dimension of features.
    dataset_size: size of dataset.
    skewness: parameter controlling eigenvalue decay of covariance. 1.0
      indicates no decay, higher indicates more decay.
    scale: parameter controlling overall scale of noise (operator norm of
      covariance matrix)
    distance: norm of target features.
    steps: number of train steps.
    optimizer: optimizer to use in training.

  Returns:
    final_difference: scaled norm of difference between learned and optimal
      weights.
    final_loss: final training loss.
    final_weights: learned weights.
    optimal_weights_eval: optimal_weights evaluated (not as a Tensor).
  """
  root_covariance, optimal_weights = generate_random_root_covariance(
      skewness, dimension)
  optimal_weights = optimal_weights * distance
  return train_model(optimizer, dimension, dataset_size, root_covariance, scale,
                     optimal_weights, steps)


def main(argv):
  del argv  # Unused.

  dimension = FLAGS.dimension
  dataset_size = FLAGS.dataset_size
  skewness = FLAGS.skewness
  distance = FLAGS.distance
  steps = FLAGS.steps
  scale = FLAGS.noise_scale
  lr = FLAGS.learning_rate
  tau = FLAGS.tau
  g_max = FLAGS.g_max
  inner_optimizer = FLAGS.inner_optimizer
  eta = FLAGS.eta
  epsilon = FLAGS.epsilon
  epsilon_v = FLAGS.epsilon_v
  betting_domain = FLAGS.betting_domain
  add_average = FLAGS.add_average
  recursive_optimizer = RecursiveOptimizer(
      betting_domain=betting_domain,
      eta=eta,
      tau=tau,
      epsilon=epsilon,
      epsilon_v=epsilon_v,
      lr=lr,
      g_max=g_max,
      inner_optimizer=inner_optimizer,
      add_average=add_average)
  adagrad_optimizer = tf.train.AdagradOptimizer(FLAGS.learning_rate)
  if FLAGS.optimizer == 'recursive':
    optimizer = recursive_optimizer
  else:
    optimizer = adagrad_optimizer

  results, final_loss, final_weights, optimal_weights = train_and_report(
      dimension, dataset_size, skewness, scale, distance, steps, optimizer)
  print('Using Optimizer: ', FLAGS.optimizer)
  print('final difference: ', results)
  print('final loss: ', final_loss)
  # print('final weights: ', final_weights)
  # print('optimal weights: ', optimal_weights)


if __name__ == '__main__':
  app.run(main)
